Create cohorts of 60 for genotype calling

Multi-breed GRM

breed_key <- 
  list.files(here::here("data/derived_data/sample_selection/ds_plink/"), pattern = ".fam", full.names = TRUE) %>% 
    set_names(nm = (basename(.) %>%
                    tools::file_path_sans_ext())) %>% 
  map_df(read_table2,
      col_names = FALSE, .id = "pop") %>% 
  rename(id = X1) %>% 
  select(pop, id) %>% 
  mutate(pop = str_remove(pop, "\\.ds_plink"))
## select: dropped 5 variables (X2, X3, X4, X5, X6) 
## mutate: changed 1039 values (100%) of 'pop' (0 new NA)
big_grm_long <- 
  read_table2(here::here(
  str_c(
    "data/derived_data/sample_selection/ds_grm_big/ds_grm_big.sXX.txt"
  )
),
col_names = FALSE) %>%
  #removes random last column that's all 1s
  select_if(is.numeric) %>%
  #Read in the fam file
  bind_cols(read_table2(here::here(
    str_c(
      "data/derived_data/sample_selection/ds_plink_big/ds_plink_big.fam"
    )
  ),
  col_names = FALSE) %>%
    #select only first column of fam file (international ids
    select(X1) %>%
    rename(ind1 = X1)) %>%
  #make row names international id
  tibble::column_to_rownames("ind1") %>%
  #make column names international id
  rlang::set_names(rownames(.)) %>%
  tibble::rownames_to_column(var = "ind1") %>%
  reshape2::melt(id = c("ind1")) %>%
  rename(ind2 = variable) %>%
  left_join(breed_key, by = c("ind1" = "id")) %>% 
  filter(pop != "limousin") %>% 
  rename(ind1_pop = pop) %>% 
  left_join(breed_key, by = c("ind2" = "id")) %>% 
  rename(ind2_pop = pop)
## select_if: dropped one variable (X1040) 
## select: dropped 5 variables (X2, X3, X4, X5, X6) 
## left_join: added 0 rows and added one column (pop) 
## filter: removed 52989 rows (5%) 
## left_join: added 0 rows and added one column (pop)

Plot multi-breed GRM

big_grm_long %>%
  ggplot(aes(x = ind1,
             y = ind2,
             fill = value)) +
  geom_tile() +
  viridis::scale_fill_viridis(
    option = "inferno",
    direction = -1) +
  theme(
    axis.text.x = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks = element_blank()
  ) +
  labs(
    x = "Individual 1",
    y = "Individual 2",
    title = str_c("Pairwise genomic relatedness pre-pruning: multi-breed (all individuals)")
  )

ggsave(here::here(str_c("figures/sample_selection/big_grm.png")))
## Saving 7 x 5 in image

Try re-plotting excluding Brahman

big_grm_long %>%
  filter(ind1_pop != "brahman") %>% 
  filter(ind2_pop != "brahman") %>% 
  ggplot(aes(x = ind1,
             y = ind2,
             fill = value)) +
  geom_tile() +
  viridis::scale_fill_viridis(
    option = "inferno",
    direction = -1) +
  theme(
    axis.text.x = element_blank(),
    axis.text.y = element_blank(),
    axis.ticks = element_blank()
  ) +
  labs(
    x = "Individual 1",
    y = "Individual 2",
    title = str_wrap("Pairwise genomic relatedness pre-pruning: multi-breed (all individuals excluding Brahman)", width = 45)
  )
## filter: removed 62340 rows (6%) 
## filter: removed 55680 rows (6%)

ggsave(here::here(str_c("figures/sample_selection/big_grm_no_brm.png")))
## Saving 7 x 5 in image

Plot multi-breed GRM split by breed

#Probably a way to do this with map and purrr but I'm lazy and can't get map to take a df as an argument

map(list("holstein", "angus", "simmental", "jersey", "hereford", "charolais"), plot_big_grm)
## filter: removed 62340 rows (6%) 
## filter: removed 55680 rows (6%) 
## filter: removed 666699 rows (73%) 
## filter: removed 180804 rows (75%)
## Saving 7 x 5 in image
## filter: removed 62340 rows (6%) 
## filter: removed 55680 rows (6%) 
## filter: removed 641245 rows (71%) 
## filter: removed 192738 rows (72%)
## Saving 7 x 5 in image
## filter: removed 62340 rows (6%) 
## filter: removed 55680 rows (6%) 
## filter: removed 779284 rows (86%) 
## filter: removed 111804 rows (87%)
## Saving 7 x 5 in image
## filter: removed 62340 rows (6%) 
## filter: removed 55680 rows (6%) 
## filter: removed 812570 rows (89%) 
## filter: removed 86338 rows (90%)
## Saving 7 x 5 in image
## filter: removed 62340 rows (6%) 
## filter: removed 55680 rows (6%) 
## filter: removed 819423 rows (90%) 
## filter: removed 80808 rows (91%)
## Saving 7 x 5 in image
## filter: removed 62340 rows (6%) 
## filter: removed 55680 rows (6%) 
## filter: removed 823339 rows (91%) 
## filter: removed 77604 rows (91%)
## Saving 7 x 5 in image
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL

Plot distribution of GRM values

  • With BRM
big_grm_long %>% 
  filter(ind1_pop == ind2_pop) %>% 
  filter(ind1 != ind2) %>% 
  mutate(ind1_pop = str_to_title(ind1_pop)) %>% 
  ggplot(aes(x = value,
             y = forcats::fct_reorder(ind1_pop, value, mean),
             #fill = forcats::fct_reorder(ind1_pop, value, mean, .desc = TRUE))) +
             fill = ind1_pop)) +
  ggridges::geom_density_ridges(alpha = 0.5) +
  guides(fill = FALSE) +
  geom_vline(aes(xintercept = mean(value))) +
  labs(x = "GRM value", y = "Kernel density", title = str_wrap("Density of pairwise genomic relatedness pre-pruning by breed (Brahman included)", width = 50))
## filter: removed 844516 rows (82%) 
## filter: removed 988 rows (1%) 
## mutate: changed 181028 values (100%) of 'ind1_pop' (0 new NA)
## Picking joint bandwidth of 0.0218

cowplot::ggsave(here::here("figures/sample_selection/big_grm_density_w_brm.png"), width = 10, height = 7, dpi = 500)
## Picking joint bandwidth of 0.0218
  • Without BRM
big_grm_long %>% 
  filter(ind1_pop == ind2_pop) %>% 
  filter(ind1 != ind2) %>% 
  filter(ind1_pop != "brahman") %>% 
  filter(ind2_pop != "brahman") %>% 
  mutate(ind1_pop = str_to_title(ind1_pop)) %>% 
  ggplot(aes(x = value,
             y = forcats::fct_reorder(ind1_pop, value, mean),
             #fill = forcats::fct_reorder(ind1_pop, value, mean, .desc = TRUE))) +
             fill = ind1_pop)) +
  ggridges::geom_density_ridges(alpha = 0.5) +
  guides(fill = FALSE) +
  geom_vline(aes(xintercept = mean(value))) +
  labs(x = "GRM value", y = "Kernel density", fill = "Breed", title = str_wrap("Density of pairwise genomic relatedness pre-pruning by breed (Brahman excluded)", width = 50))
## filter: removed 844516 rows (82%) 
## filter: removed 988 rows (1%) 
## filter: removed 3540 rows (2%) 
## filter: no rows removed 
## mutate: changed 177488 values (100%) of 'ind1_pop' (0 new NA)
## Picking joint bandwidth of 0.0063

cowplot::ggsave(here::here("figures/sample_selection/big_grm_density_no_brm.png"), width = 10, height = 7, dpi = 500)
## Picking joint bandwidth of 0.0063

Probing multi-breed GRM

Top 200 highest values

  • Including diagonal
big_grm_long %>% 
  filter(ind1_pop != "brahman") %>% 
  filter(ind2_pop != "brahman") %>% 
  top_n(200, value) %>% 
  arrange(desc(value))
## filter: removed 62340 rows (6%) 
## filter: removed 55680 rows (6%)
##                    ind1                ind2     value  ind1_pop  ind2_pop
## 1   CHAUSAM000VPM122718 CHAUSAM000VPM122718 2.5706042 charolais charolais
## 2   UMCUSAU000000194370 UMCUSAU000000194370 2.0339775 charolais charolais
## 3   CHAUSAM00000M225504 CHAUSAM00000M225504 1.3357560 charolais charolais
## 4   CHAUSAM00000M319454 CHAUSAM00000M319454 1.2878654 charolais charolais
## 5   CHAUSAM000VPM122718 CHAUSAM00000M225504 1.2679501 charolais charolais
## 6   CHAUSAM00000M225504 CHAUSAM000VPM122718 1.2679501 charolais charolais
## 7   CHAUSAM000VPM122718 CHAUSAM00000M319454 1.1819029 charolais charolais
## 8   CHAUSAM00000M319454 CHAUSAM000VPM122718 1.1819029 charolais charolais
## 9   CHAUSAM00000M234430 CHAUSAM00000M234430 1.0737484 charolais charolais
## 10  UMCUSAU000000194368 UMCUSAU000000194368 1.0615928 charolais charolais
## 11  UMCUSAU000000194369 UMCUSAU000000194369 0.9810364 charolais charolais
## 12  CHACANM0000RMC30160 CHACANM0000RMC30160 0.9398123 charolais charolais
## 13  CHAUSAM00000M319454 CHAUSAM00000M225504 0.9378336 charolais charolais
## 14  CHAUSAM00000M225504 CHAUSAM00000M319454 0.9378336 charolais charolais
## 15  UMCUSAU000000194657 UMCUSAU000000194657 0.9271013    jersey    jersey
## 16  CHACANM0000MC106929 CHACANM0000MC106929 0.9252602 charolais charolais
## 17  CHACANM0000MC236083 CHACANM0000MC236083 0.8828932 charolais charolais
## 18  UMCUSAU000000194349 UMCUSAU000000194349 0.8775803     angus     angus
## 19  UMCUSAM000000196786 UMCUSAM000000196786 0.8730899  holstein  holstein
## 20  UMCUSAM000000196776 UMCUSAM000000196776 0.8587716  holstein  holstein
## 21  RANUSAM000003358146 RANUSAM000003358146 0.8371725 simmental simmental
## 22  UMCUSAU000000198102 UMCUSAU000000198102 0.8320777  hereford  hereford
## 23  HOL840M003131131453 HOL840M003131131453 0.8317153  holstein  holstein
## 24  CHAUSAM0000MC190391 CHAUSAM0000MC190391 0.8270637 charolais charolais
## 25  UMCUSAF000000199724 UMCUSAF000000199724 0.8238097  hereford  hereford
## 26  CHACANM0000MC293022 CHACANM0000MC293022 0.8134152 charolais charolais
## 27  SIMCANM000000361254 SIMCANM000000361254 0.8078976 simmental simmental
## 28  UMCUSAM000000198548 UMCUSAM000000198548 0.7985101 simmental simmental
## 29  UMCUSAU000000194332 UMCUSAU000000194332 0.7934186     angus     angus
## 30  SIMUSAM000002068996 SIMUSAM000002068996 0.7900628 simmental simmental
## 31  CHAFRAM005882101816 CHAFRAM005882101816 0.7876660 charolais charolais
## 32  HERUSAM000014661058 HERUSAM000014661058 0.7842708  hereford  hereford
## 33  HOLUSAM003126477819 HOLUSAM003126477819 0.7809691  holstein  holstein
## 34  UMCUSAM000000196847 UMCUSAM000000196847 0.7792097  holstein  holstein
## 35  CHAUSAM000VPM122718 CHAUSAM00000M234430 0.7694200 charolais charolais
## 36  CHAUSAM00000M234430 CHAUSAM000VPM122718 0.7694200 charolais charolais
## 37  SIMUSAM000001716917 SIMUSAM000001716917 0.7689839 simmental simmental
## 38  CHACANM0000MC299727 CHACANM0000MC299727 0.7665262 charolais charolais
## 39  UMCUSAU000000194752 UMCUSAU000000194752 0.7616109 simmental simmental
## 40  CHACANM0000MC226738 CHACANM0000MC226738 0.7569395 charolais charolais
## 41  SIMUSAM000001907954 SIMUSAM000001907954 0.7530423 simmental simmental
## 42  HOL840M003129016258 HOL840M003129016258 0.7520021  holstein  holstein
## 43  UMCUSAU000000194373 UMCUSAU000000194373 0.7504952 charolais charolais
## 44  SIMDEUM000919598352 SIMDEUM000919598352 0.7488988 simmental simmental
## 45  HOLUSAM000070626136 HOLUSAM000070626136 0.7458174  holstein  holstein
## 46  HOLCANM000109538236 HOLCANM000109538236 0.7401696  holstein  holstein
## 47  UMCUSAU000000194847 UMCUSAU000000194847 0.7389349 simmental simmental
## 48  UMCUSAU000000194805 UMCUSAU000000194805 0.7373065 simmental simmental
## 49  UMCUSAU000000194372 UMCUSAU000000194372 0.7356412 charolais charolais
## 50  UMCUSAM000000196842 UMCUSAM000000196842 0.7320852  holstein  holstein
## 51  HOLUSAM000002070579 HOLUSAM000002070579 0.7304240  holstein  holstein
## 52  UMCUSAU000000194820 UMCUSAU000000194820 0.7170784 charolais charolais
## 53  SIMUSAM000001937373 SIMUSAM000001937373 0.7163587 simmental simmental
## 54  JERUSAM000000652501 JERUSAM000000652501 0.7162488    jersey    jersey
## 55  UMCUSAU000000194623 UMCUSAU000000194623 0.7150313    jersey    jersey
## 56  HOLUSAM000072826907 HOLUSAM000072826907 0.7137188  holstein  holstein
## 57  SIMCANM000000216954 SIMCANM000000216954 0.7134835 simmental simmental
## 58  HOLFRAM004493050102 HOLFRAM004493050102 0.7134118  holstein  holstein
## 59  UMCUSAM000000196784 UMCUSAM000000196784 0.7113015  holstein  holstein
## 60  SIMDEUM007600026785 SIMDEUM007600026785 0.7097411 simmental simmental
## 61  UMCUSAU000000194623 JERUSAM000000652501 0.7091562    jersey    jersey
## 62  JERUSAM000000652501 UMCUSAU000000194623 0.7091562    jersey    jersey
## 63  CHAUSAM00000M434790 CHAUSAM00000M434790 0.7081365 charolais charolais
## 64  HOLUSAM003009329221 HOLUSAM003009329221 0.6998858  holstein  holstein
## 65  HERUSAM000042800895 HERUSAM000042800895 0.6955825  hereford  hereford
## 66  SIMDEUM000933038755 SIMDEUM000933038755 0.6930524 simmental simmental
## 67  HOLUSAM000072495715 HOLUSAM000072495715 0.6924689  holstein  holstein
## 68  UMCUSAF000000109173 UMCUSAF000000109173 0.6920664  holstein  holstein
## 69  JER199M000071199883 JER199M000071199883 0.6917290    jersey    jersey
## 70  UMCUSAU000000194669 UMCUSAU000000194669 0.6914219    jersey    jersey
## 71  UMCUSAU000000194643 UMCUSAU000000194643 0.6876487    jersey    jersey
## 72  UMCUSAM000000196808 UMCUSAM000000196808 0.6873916  holstein  holstein
## 73  UMCUSAU000000194652 UMCUSAU000000194652 0.6854573    jersey    jersey
## 74  HERCANM000000C01369 HERCANM000000C01369 0.6854528  hereford  hereford
## 75  SIMDEUM000915040032 SIMDEUM000915040032 0.6853930 simmental simmental
## 76  UMCUSAU000000194718 UMCUSAU000000194718 0.6814544     angus     angus
## 77  UMCUSAM000000196803 UMCUSAM000000196803 0.6809952  holstein  holstein
## 78  UMCUSAU000000194830 UMCUSAU000000194830 0.6803465  hereford  hereford
## 79  CHACANM0000MC250513 CHACANM0000MC250513 0.6795258 charolais charolais
## 80  HERCANM000C02789138 HERCANM000C02789138 0.6786716  hereford  hereford
## 81  SIMUSAM000000320938 SIMUSAM000000320938 0.6770065 simmental simmental
## 82  HOLUSAM000074284017 HOLUSAM000074284017 0.6768242  holstein  holstein
## 83  UMCUSAU000000194250 UMCUSAU000000194250 0.6767167     angus     angus
## 84  HOLUSAM003128824393 HOLUSAM003128824393 0.6757989  holstein  holstein
## 85  UMCUSAU000000194335 UMCUSAU000000194335 0.6754384    jersey    jersey
## 86  SIMUSAM000001818026 SIMUSAM000001818026 0.6748551 simmental simmental
## 87  SIMUSAM000002240471 SIMUSAM000002240471 0.6717169 simmental simmental
## 88  UMCUSAU000000198090 UMCUSAU000000198090 0.6715470 simmental simmental
## 89  UMCUSAU000000194185 UMCUSAU000000194185 0.6714456  holstein  holstein
## 90  UMCUSAM000000196827 UMCUSAM000000196827 0.6709645  holstein  holstein
## 91  CHAUSAM00000M411450 CHAUSAM00000M411450 0.6709625 charolais charolais
## 92  UMCUSAU000000194670 UMCUSAU000000194670 0.6708354    jersey    jersey
## 93  UMCUSAM000000087954 UMCUSAM000000087954 0.6707806  hereford  hereford
## 94  UMCUSAU000000194760 UMCUSAU000000194760 0.6705456  hereford  hereford
## 95  UMCUSAU000000194840 UMCUSAU000000194840 0.6694463 simmental simmental
## 96  UMCUSAU000000194665 UMCUSAU000000194665 0.6687646    jersey    jersey
## 97  CHAUSAM00000M318119 CHAUSAM00000M318119 0.6674392 charolais charolais
## 98  UMCUSAU000000194718 UMCUSAU000000194250 0.6666666     angus     angus
## 99  UMCUSAU000000194250 UMCUSAU000000194718 0.6666666     angus     angus
## 100 UMCUSAM000000092750 UMCUSAM000000092750 0.6659566 charolais charolais
## 101 SIMUSAM000000006084 SIMUSAM000000006084 0.6646682 simmental simmental
## 102 SIMDEUM000929189864 SIMDEUM000929189864 0.6643219 simmental simmental
## 103 UMCUSAU000000194663 UMCUSAU000000194663 0.6638102    jersey    jersey
## 104 UMCUSAU000000194666 UMCUSAU000000194666 0.6634736    jersey    jersey
## 105 HOLUSAM000074564764 HOLUSAM000074564764 0.6634068  holstein  holstein
## 106 UMCUSAM000000196831 UMCUSAM000000196831 0.6623711  holstein  holstein
## 107 SIMDEUM000912851741 SIMDEUM000912851741 0.6612683 simmental simmental
## 108 UMCUSAU000000194829 UMCUSAU000000194829 0.6599993 simmental simmental
## 109 UMCUSAM000000198521 UMCUSAM000000198521 0.6592910     angus     angus
## 110 UMCUSAU000000198082 UMCUSAU000000198082 0.6590175 simmental simmental
## 111 SIMDEUM000918912889 SIMDEUM000918912889 0.6586103 simmental simmental
## 112 SIMDEUM000932739095 SIMDEUM000932739095 0.6581896 simmental simmental
## 113 UMCUSAM000000198543 UMCUSAM000000198543 0.6572667 simmental simmental
## 114 HERCANM000C02698670 HERCANM000C02698670 0.6563364  hereford  hereford
## 115 UMCUSAM000000198530 UMCUSAM000000198530 0.6560225  hereford  hereford
## 116 UMCUSAF000000199727 UMCUSAF000000199727 0.6559463  hereford  hereford
## 117 UMCUSAU000000194266 UMCUSAU000000194266 0.6556448     angus     angus
## 118 HOL840M003130854065 HOL840M003130854065 0.6548750  holstein  holstein
## 119 UMCUSAM000000196812 UMCUSAM000000196812 0.6539292  holstein  holstein
## 120 HERUSAM000015587538 HERUSAM000015587538 0.6524675  hereford  hereford
## 121 JERAUSM000A00000810 JERAUSM000A00000810 0.6521554    jersey    jersey
## 122 UMCUSAU000000194266 UMCUSAM000000198521 0.6518054     angus     angus
## 123 UMCUSAM000000198521 UMCUSAU000000194266 0.6518054     angus     angus
## 124 UMCUSAU000000194613 UMCUSAU000000194613 0.6512466    jersey    jersey
## 125 HOLUSAM000074228150 HOLUSAM000074228150 0.6505504  holstein  holstein
## 126 UMCUSAU000000194616 UMCUSAU000000194616 0.6504354    jersey    jersey
## 127 HOLFRAM005694028588 HOLFRAM005694028588 0.6497807  holstein  holstein
## 128 UMCUSAU000000194637 UMCUSAU000000194637 0.6491219    jersey    jersey
## 129 SIMCANM000000248382 SIMCANM000000248382 0.6490407 simmental simmental
## 130 JERAUSM000A00008529 JERAUSM000A00008529 0.6489935    jersey    jersey
## 131 UMCUSAU000000194750 UMCUSAU000000194750 0.6483195 charolais charolais
## 132 AANUSAM000016447771 AANUSAM000016447771 0.6482150     angus     angus
## 133 UMCUSAU000000194831 UMCUSAU000000194831 0.6480902 charolais charolais
## 134 UMCUSAU000000194613 JERAUSM000A00000810 0.6477522    jersey    jersey
## 135 JERAUSM000A00000810 UMCUSAU000000194613 0.6477522    jersey    jersey
## 136 UMCUSAU000000198089 UMCUSAU000000198089 0.6476445 simmental simmental
## 137 UMCUSAU000000194783 UMCUSAU000000194783 0.6469001  hereford  hereford
## 138 UMCUSAU000000194658 UMCUSAU000000194658 0.6459441    jersey    jersey
## 139 UMCUSAU000000194185 HOLFRAM004493050102 0.6456350  holstein  holstein
## 140 HOLFRAM004493050102 UMCUSAU000000194185 0.6456350  holstein  holstein
## 141 SIMDEUM000979317838 SIMDEUM000979317838 0.6455098 simmental simmental
## 142 UMCUSAU000000194617 UMCUSAU000000194617 0.6452341    jersey    jersey
## 143 UMCUSAU000000194573 UMCUSAU000000194573 0.6445948  holstein  holstein
## 144 UMCUSAU000000194616 JERAUSM000A00008529 0.6443452    jersey    jersey
## 145 JERAUSM000A00008529 UMCUSAU000000194616 0.6443452    jersey    jersey
## 146 HOL840M003124584834 HOL840M003124584834 0.6439167  holstein  holstein
## 147 JERAUSM000A00011730 JERAUSM000A00011730 0.6438515    jersey    jersey
## 148 HOLUSAM003125201927 HOLUSAM003125201927 0.6436275  holstein  holstein
## 149 UMCUSAM000000196763 UMCUSAM000000196763 0.6433089  holstein  holstein
## 150 CHACANM00000FMC5641 CHACANM00000FMC5641 0.6430507 charolais charolais
## 151 UMCUSAU000000194341 UMCUSAU000000194341 0.6427181    jersey    jersey
## 152 SIMUSAM000000000001 SIMUSAM000000000001 0.6395406 simmental simmental
## 153 AANUSAM000018365756 AANUSAM000018365756 0.6385023     angus     angus
## 154 UMCUSAU000000194617 JERAUSM000A00011730 0.6384078    jersey    jersey
## 155 JERAUSM000A00011730 UMCUSAU000000194617 0.6384078    jersey    jersey
## 156 UMCUSAU000000194625 UMCUSAU000000194625 0.6381224    jersey    jersey
## 157 UMCUSAU000000194827 UMCUSAU000000194827 0.6372269 charolais charolais
## 158 UMCUSAU000000194273 UMCUSAU000000194273 0.6361041  hereford  hereford
## 159 UMCUSAU000000194842 UMCUSAU000000194842 0.6360503 simmental simmental
## 160 HERCANM000C02728663 HERCANM000C02728663 0.6356617  hereford  hereford
## 161 UMCUSAU000000194786 UMCUSAU000000194786 0.6345374 charolais charolais
## 162 SIMDEUM000938263111 SIMDEUM000938263111 0.6344897 simmental simmental
## 163 UMCUSAM000000196826 UMCUSAM000000196826 0.6339579  holstein  holstein
## 164 UMCUSAU000000194621 UMCUSAU000000194621 0.6329732    jersey    jersey
## 165 CHAUSAM00000M297007 CHAUSAM00000M297007 0.6327429 charolais charolais
## 166 SIMUSAM000002144976 SIMUSAM000002144976 0.6324821 simmental simmental
## 167 HOLUSAM000071451889 HOLUSAM000071451889 0.6323924  holstein  holstein
## 168 JERUSAM000000650436 JERUSAM000000650436 0.6323838    jersey    jersey
## 169 SIMUSAM000000000010 SIMUSAM000000000010 0.6319438 simmental simmental
## 170 CHAFRAM005872122876 CHAFRAM005872122876 0.6311314 charolais charolais
## 171 HERCANM000C02738219 HERCANM000C02738219 0.6308122  hereford  hereford
## 172 SIMUSAM000001282876 SIMUSAM000001282876 0.6307437 simmental simmental
## 173 UMCUSAU000000194738 UMCUSAU000000194738 0.6305916  hereford  hereford
## 174 AANUSAM000009506886 AANUSAM000009506886 0.6304343     angus     angus
## 175 SIMDEUM000912851233 SIMDEUM000912851233 0.6302727 simmental simmental
## 176 UMCUSAU000000194660 UMCUSAU000000194660 0.6299851    jersey    jersey
## 177 HOLUSAM003125519831 HOLUSAM003125519831 0.6299163  holstein  holstein
## 178 SIMCANM000000293252 SIMCANM000000293252 0.6298893 simmental simmental
## 179 SIMDEUM000913325437 SIMDEUM000913325437 0.6297151 simmental simmental
## 180 HOL840M003128792954 HOL840M003128792954 0.6296093  holstein  holstein
## 181 UMCUSAU000000198080 UMCUSAU000000198080 0.6293500 simmental simmental
## 182 AANUSAM000015330743 AANUSAM000015330743 0.6289334     angus     angus
## 183 UMCUSAU000000194621 JERUSAM000000650436 0.6281390    jersey    jersey
## 184 JERUSAM000000650436 UMCUSAU000000194621 0.6281390    jersey    jersey
## 185 UMCUSAU000000194768 UMCUSAU000000194768 0.6277001 charolais charolais
## 186 AANNZLM001217000784 AANNZLM001217000784 0.6262946     angus     angus
## 187 UMCUSAU000000194792 UMCUSAU000000194792 0.6260797 simmental simmental
## 188 UMCUSAU000000194627 UMCUSAU000000194627 0.6260069    jersey    jersey
## 189 UMCUSAU000000198081 UMCUSAU000000198081 0.6259564 simmental simmental
## 190 UMCUSAU000000194635 UMCUSAU000000194635 0.6245757    jersey    jersey
## 191 HOLUSAM003128590796 HOLUSAM003128590796 0.6244550  holstein  holstein
## 192 HOLUSAM003129037765 HOLUSAM003129037765 0.6240283  holstein  holstein
## 193 SIMUSAM000002002092 SIMUSAM000002002092 0.6229744 simmental simmental
## 194 UMCUSAU000000194848 UMCUSAU000000194848 0.6227866  holstein  holstein
## 195 UMCUSAU000000194668 UMCUSAU000000194668 0.6219642    jersey    jersey
## 196 UMCUSAU000000194800 UMCUSAU000000194800 0.6217710 simmental simmental
## 197 UMCUSAU000000194630 UMCUSAU000000194630 0.6213933    jersey    jersey
## 198 UMCUSAM000000198537 UMCUSAM000000198537 0.6213582     angus     angus
## 199 SIMDEUM000929276244 SIMDEUM000929276244 0.6213490 simmental simmental
## 200 CHAUSAM00000M246564 CHAUSAM00000M246564 0.6205132 charolais charolais
  • Excluding diagonal
#excluding diagonal
big_grm_long %>% 
  filter(ind1_pop != "brahman") %>% 
  filter(ind2_pop != "brahman") %>% 
  filter(ind1 != ind2) %>% 
  top_n(200, value) %>% 
  arrange(desc(value))
## filter: removed 62340 rows (6%) 
## filter: removed 55680 rows (6%) 
## filter: removed 928 rows (<1%)
##                    ind1                ind2     value  ind1_pop  ind2_pop
## 1   CHAUSAM000VPM122718 CHAUSAM00000M225504 1.2679501 charolais charolais
## 2   CHAUSAM00000M225504 CHAUSAM000VPM122718 1.2679501 charolais charolais
## 3   CHAUSAM000VPM122718 CHAUSAM00000M319454 1.1819029 charolais charolais
## 4   CHAUSAM00000M319454 CHAUSAM000VPM122718 1.1819029 charolais charolais
## 5   CHAUSAM00000M319454 CHAUSAM00000M225504 0.9378336 charolais charolais
## 6   CHAUSAM00000M225504 CHAUSAM00000M319454 0.9378336 charolais charolais
## 7   CHAUSAM000VPM122718 CHAUSAM00000M234430 0.7694200 charolais charolais
## 8   CHAUSAM00000M234430 CHAUSAM000VPM122718 0.7694200 charolais charolais
## 9   UMCUSAU000000194623 JERUSAM000000652501 0.7091562    jersey    jersey
## 10  JERUSAM000000652501 UMCUSAU000000194623 0.7091562    jersey    jersey
## 11  UMCUSAU000000194718 UMCUSAU000000194250 0.6666666     angus     angus
## 12  UMCUSAU000000194250 UMCUSAU000000194718 0.6666666     angus     angus
## 13  UMCUSAU000000194266 UMCUSAM000000198521 0.6518054     angus     angus
## 14  UMCUSAM000000198521 UMCUSAU000000194266 0.6518054     angus     angus
## 15  UMCUSAU000000194613 JERAUSM000A00000810 0.6477522    jersey    jersey
## 16  JERAUSM000A00000810 UMCUSAU000000194613 0.6477522    jersey    jersey
## 17  UMCUSAU000000194185 HOLFRAM004493050102 0.6456350  holstein  holstein
## 18  HOLFRAM004493050102 UMCUSAU000000194185 0.6456350  holstein  holstein
## 19  UMCUSAU000000194616 JERAUSM000A00008529 0.6443452    jersey    jersey
## 20  JERAUSM000A00008529 UMCUSAU000000194616 0.6443452    jersey    jersey
## 21  UMCUSAU000000194617 JERAUSM000A00011730 0.6384078    jersey    jersey
## 22  JERAUSM000A00011730 UMCUSAU000000194617 0.6384078    jersey    jersey
## 23  UMCUSAU000000194621 JERUSAM000000650436 0.6281390    jersey    jersey
## 24  JERUSAM000000650436 UMCUSAU000000194621 0.6281390    jersey    jersey
## 25  SIMUSAM000001907954 SIMUSAM000001716917 0.6161832 simmental simmental
## 26  SIMUSAM000001716917 SIMUSAM000001907954 0.6161832 simmental simmental
## 27  HOLCANM000109538236 HOL840M003131131453 0.6147859  holstein  holstein
## 28  HOL840M003131131453 HOLCANM000109538236 0.6147859  holstein  holstein
## 29  UMCUSAU000000194614 JERAUSM000A00001716 0.6137056    jersey    jersey
## 30  JERAUSM000A00001716 UMCUSAU000000194614 0.6137056    jersey    jersey
## 31  HOLUSAM003128824393 HOLCANM000109538236 0.6122493  holstein  holstein
## 32  HOLCANM000109538236 HOLUSAM003128824393 0.6122493  holstein  holstein
## 33  UMCUSAU000000194611 JERAUSM000A00000734 0.6101062    jersey    jersey
## 34  JERAUSM000A00000734 UMCUSAU000000194611 0.6101062    jersey    jersey
## 35  UMCUSAM000000196852 UMCUSAM000000196809 0.5953712  holstein  holstein
## 36  UMCUSAM000000196809 UMCUSAM000000196852 0.5953712  holstein  holstein
## 37  UMCUSAU000000194720 UMCUSAU000000194252 0.5923813     angus     angus
## 38  UMCUSAU000000194252 UMCUSAU000000194720 0.5923813     angus     angus
## 39  UMCUSAM000000196851 UMCUSAM000000196773 0.5895674  holstein  holstein
## 40  UMCUSAM000000196773 UMCUSAM000000196851 0.5895674  holstein  holstein
## 41  UMCUSAU000000194612 JERAUSM000A00000747 0.5881447    jersey    jersey
## 42  JERAUSM000A00000747 UMCUSAU000000194612 0.5881447    jersey    jersey
## 43  UMCUSAM000000198519 JERAUSM000A00010153 0.5872306    jersey    jersey
## 44  JERAUSM000A00010153 UMCUSAM000000198519 0.5872306    jersey    jersey
## 45  UMCUSAU000000194370 UMCUSAU000000194368 0.5843120 charolais charolais
## 46  UMCUSAU000000194368 UMCUSAU000000194370 0.5843120 charolais charolais
## 47  UMCUSAU000000194619 JERUSAM000000646877 0.5825573    jersey    jersey
## 48  JERUSAM000000646877 UMCUSAU000000194619 0.5825573    jersey    jersey
## 49  HOLUSAM000071451889 HOLUSAM000070626136 0.5810284  holstein  holstein
## 50  HOLUSAM000070626136 HOLUSAM000071451889 0.5810284  holstein  holstein
## 51  UMCUSAU000000194727 UMCUSAU000000194259 0.5807513     angus     angus
## 52  UMCUSAU000000194259 UMCUSAU000000194727 0.5807513     angus     angus
## 53  UMCUSAU000000194657 UMCUSAU000000194623 0.5780375    jersey    jersey
## 54  UMCUSAU000000194623 UMCUSAU000000194657 0.5780375    jersey    jersey
## 55  UMCUSAU000000194657 JERUSAM000000652501 0.5755013    jersey    jersey
## 56  JERUSAM000000652501 UMCUSAU000000194657 0.5755013    jersey    jersey
## 57  HOLUSAM003131131371 HOL840M003131131371 0.5752966  holstein  holstein
## 58  HOL840M003131131371 HOLUSAM003131131371 0.5752966  holstein  holstein
## 59  UMCUSAU000000194615 JERAUSM000A00003096 0.5705881    jersey    jersey
## 60  JERAUSM000A00003096 UMCUSAU000000194615 0.5705881    jersey    jersey
## 61  UMCUSAU000000194715 UMCUSAU000000194247 0.5698926     angus     angus
## 62  UMCUSAU000000194247 UMCUSAU000000194715 0.5698926     angus     angus
## 63  UMCUSAU000000194731 UMCUSAU000000194264 0.5686411     angus     angus
## 64  UMCUSAU000000194264 UMCUSAU000000194731 0.5686411     angus     angus
## 65  HOLUSAM003125201927 HOLCANM000109538236 0.5670664  holstein  holstein
## 66  HOLCANM000109538236 HOLUSAM003125201927 0.5670664  holstein  holstein
## 67  UMCUSAU000000194620 JERUSAM000000649797 0.5647312    jersey    jersey
## 68  JERUSAM000000649797 UMCUSAU000000194620 0.5647312    jersey    jersey
## 69  UMCUSAU000000194624 JERUSAM000000665185 0.5646840    jersey    jersey
## 70  JERUSAM000000665185 UMCUSAU000000194624 0.5646840    jersey    jersey
## 71  UMCUSAM000000196791 UMCUSAM000000196782 0.5638757  holstein  holstein
## 72  UMCUSAM000000196782 UMCUSAM000000196791 0.5638757  holstein  holstein
## 73  UMCUSAU000000194618 JERUSAM000000644410 0.5604355    jersey    jersey
## 74  JERUSAM000000644410 UMCUSAU000000194618 0.5604355    jersey    jersey
## 75  HOLUSAM003009329221 HOL840M003131131453 0.5558447  holstein  holstein
## 76  HOL840M003131131453 HOLUSAM003009329221 0.5558447  holstein  holstein
## 77  AANUSAM000007501542 AANUSAM000005221298 0.5554328     angus     angus
## 78  AANUSAM000005221298 AANUSAM000007501542 0.5554328     angus     angus
## 79  UMCUSAU000000194560 HOLAUSM000A00009559 0.5543284  holstein  holstein
## 80  HOLAUSM000A00009559 UMCUSAU000000194560 0.5543284  holstein  holstein
## 81  UMCUSAU000000194622 JERUSAM000000651268 0.5540344    jersey    jersey
## 82  JERUSAM000000651268 UMCUSAU000000194622 0.5540344    jersey    jersey
## 83  UMCUSAU000000194726 UMCUSAU000000194258 0.5520806     angus     angus
## 84  UMCUSAU000000194258 UMCUSAU000000194726 0.5520806     angus     angus
## 85  UMCUSAM000000199730 UMCUSAF000000199724 0.5520187  hereford  hereford
## 86  UMCUSAF000000199724 UMCUSAM000000199730 0.5520187  hereford  hereford
## 87  UMCUSAU000000194702 UMCUSAU000000194234 0.5447748     angus     angus
## 88  UMCUSAU000000194234 UMCUSAU000000194702 0.5447748     angus     angus
## 89  UMCUSAU000000194723 UMCUSAU000000194255 0.5441425     angus     angus
## 90  UMCUSAU000000194255 UMCUSAU000000194723 0.5441425     angus     angus
## 91  HOLUSAM003125201927 HOL840M003131131453 0.5431097  holstein  holstein
## 92  HOL840M003131131453 HOLUSAM003125201927 0.5431097  holstein  holstein
## 93  HOLUSAM003128824393 HOL840M003131131453 0.5417542  holstein  holstein
## 94  HOL840M003131131453 HOLUSAM003128824393 0.5417542  holstein  holstein
## 95  UMCUSAU000000194728 UMCUSAU000000194261 0.5406755     angus     angus
## 96  UMCUSAU000000194261 UMCUSAU000000194728 0.5406755     angus     angus
## 97  UMCUSAF000000199727 UMCUSAF000000199724 0.5366674  hereford  hereford
## 98  UMCUSAF000000199724 UMCUSAF000000199727 0.5366674  hereford  hereford
## 99  UMCUSAU000000194182 HOLFRAM002998012650 0.5365281  holstein  holstein
## 100 HOLFRAM002998012650 UMCUSAU000000194182 0.5365281  holstein  holstein
## 101 UMCUSAU000000194697 UMCUSAU000000194229 0.5356164     angus     angus
## 102 UMCUSAU000000194229 UMCUSAU000000194697 0.5356164     angus     angus
## 103 UMCUSAU000000194260 UMCUSAM000000198520 0.5354116     angus     angus
## 104 UMCUSAM000000198520 UMCUSAU000000194260 0.5354116     angus     angus
## 105 UMCUSAU000000194696 UMCUSAU000000194228 0.5341569     angus     angus
## 106 UMCUSAU000000194228 UMCUSAU000000194696 0.5341569     angus     angus
## 107 UMCUSAU000000194733 UMCUSAU000000194265 0.5334909     angus     angus
## 108 UMCUSAU000000194265 UMCUSAU000000194733 0.5334909     angus     angus
## 109 UMCUSAM000000196770 HOLUSAM003126477819 0.5334516  holstein  holstein
## 110 HOLUSAM003126477819 UMCUSAM000000196770 0.5334516  holstein  holstein
## 111 UMCUSAU000000194716 UMCUSAU000000194248 0.5293155     angus     angus
## 112 UMCUSAU000000194248 UMCUSAU000000194716 0.5293155     angus     angus
## 113 UMCUSAU000000194782 UMCUSAU000000194796 0.5278592  hereford  limousin
## 114 UMCUSAU000000194573 HOLCANM000000352790 0.5233847  holstein  holstein
## 115 HOLCANM000000352790 UMCUSAU000000194573 0.5233847  holstein  holstein
## 116 UMCUSAM000000196786 HOLUSAM003012560018 0.5207687  holstein  holstein
## 117 HOLUSAM003012560018 UMCUSAM000000196786 0.5207687  holstein  holstein
## 118 UMCUSAU000000194567 HOLAUSM000H01059976 0.5202426  holstein  holstein
## 119 HOLAUSM000H01059976 UMCUSAU000000194567 0.5202426  holstein  holstein
## 120 UMCUSAU000000194558 HOLAUSM000A00009209 0.5192653  holstein  holstein
## 121 HOLAUSM000A00009209 UMCUSAU000000194558 0.5192653  holstein  holstein
## 122 UMCUSAM000000196847 HOLUSAM000002070579 0.5192495  holstein  holstein
## 123 HOLUSAM000002070579 UMCUSAM000000196847 0.5192495  holstein  holstein
## 124 UMCUSAU000000194713 UMCUSAU000000194245 0.5188542     angus     angus
## 125 UMCUSAU000000194245 UMCUSAU000000194713 0.5188542     angus     angus
## 126 UMCUSAU000000194671 UMCUSAU000000194657 0.5179075    jersey    jersey
## 127 UMCUSAU000000194657 UMCUSAU000000194671 0.5179075    jersey    jersey
## 128 HOLUSAM003009329221 HOLCANM000109538236 0.5173219  holstein  holstein
## 129 HOLCANM000109538236 HOLUSAM003009329221 0.5173219  holstein  holstein
## 130 UMCUSAU000000194721 UMCUSAU000000194253 0.5159907     angus     angus
## 131 UMCUSAU000000194253 UMCUSAU000000194721 0.5159907     angus     angus
## 132 HOLUSAM003131131371 HOLCANM000109538236 0.5150506  holstein  holstein
## 133 HOLCANM000109538236 HOLUSAM003131131371 0.5150506  holstein  holstein
## 134 HOLCANM000109538236 HOL840M003131131371 0.5131684  holstein  holstein
## 135 HOL840M003131131371 HOLCANM000109538236 0.5131684  holstein  holstein
## 136 SIMUSAM000001937373 RANUSAM000003358146 0.5105202 simmental simmental
## 137 RANUSAM000003358146 SIMUSAM000001937373 0.5105202 simmental simmental
## 138 UMCUSAU000000194657 UMCUSAU000000194654 0.5055063    jersey    jersey
## 139 UMCUSAU000000194654 UMCUSAU000000194657 0.5055063    jersey    jersey
## 140 UMCUSAU000000194273 HERUSAF000042190680 0.5015844  hereford  hereford
## 141 HERUSAF000042190680 UMCUSAU000000194273 0.5015844  hereford  hereford
## 142 UMCUSAU000000194573 UMCUSAM000000196784 0.5014160  holstein  holstein
## 143 UMCUSAM000000196784 UMCUSAU000000194573 0.5014160  holstein  holstein
## 144 UMCUSAU000000194709 UMCUSAU000000194241 0.5003591     angus     angus
## 145 UMCUSAU000000194241 UMCUSAU000000194709 0.5003591     angus     angus
## 146 HOLUSAM000072495715 HOLUSAM000070626136 0.4974645  holstein  holstein
## 147 HOLUSAM000070626136 HOLUSAM000072495715 0.4974645  holstein  holstein
## 148 UMCUSAM000000196831 UMCUSAM000000196786 0.4971759  holstein  holstein
## 149 UMCUSAM000000196786 UMCUSAM000000196831 0.4971759  holstein  holstein
## 150 HOLUSAM000072495715 HOLUSAM000071451889 0.4958166  holstein  holstein
## 151 HOLUSAM000071451889 HOLUSAM000072495715 0.4958166  holstein  holstein
## 152 UMCUSAU000000194180 HOLFRAM002296001756 0.4939476  holstein  holstein
## 153 HOLFRAM002296001756 UMCUSAU000000194180 0.4939476  holstein  holstein
## 154 UMCUSAU000000194657 UMCUSAU000000194619 0.4925979    jersey    jersey
## 155 UMCUSAU000000194619 UMCUSAU000000194657 0.4925979    jersey    jersey
## 156 UMCUSAU000000194725 UMCUSAU000000194257 0.4925138     angus     angus
## 157 UMCUSAU000000194257 UMCUSAU000000194725 0.4925138     angus     angus
## 158 UMCUSAU000000194729 UMCUSAU000000194262 0.4912487     angus     angus
## 159 UMCUSAU000000194262 UMCUSAU000000194729 0.4912487     angus     angus
## 160 UMCUSAU000000194688 UMCUSAU000000194220 0.4904374     angus     angus
## 161 UMCUSAU000000194220 UMCUSAU000000194688 0.4904374     angus     angus
## 162 UMCUSAU000000194657 JER260M003126073776 0.4890576    jersey    jersey
## 163 JER260M003126073776 UMCUSAU000000194657 0.4890576    jersey    jersey
## 164 UMCUSAU000000194657 JERUSAM000000646877 0.4883554    jersey    jersey
## 165 JERUSAM000000646877 UMCUSAU000000194657 0.4883554    jersey    jersey
## 166 CHAUSAM000VPM122718 CHACANM0000RMC30160 0.4879357 charolais charolais
## 167 CHACANM0000RMC30160 CHAUSAM000VPM122718 0.4879357 charolais charolais
## 168 UMCUSAM000000196831 HOLUSAM003012560018 0.4871702  holstein  holstein
## 169 HOLUSAM003012560018 UMCUSAM000000196831 0.4871702  holstein  holstein
## 170 UMCUSAU000000194649 UMCUSAM000000033675 0.4867226    jersey    jersey
## 171 UMCUSAM000000033675 UMCUSAU000000194649 0.4867226    jersey    jersey
## 172 UMCUSAU000000194699 UMCUSAU000000194231 0.4858802     angus     angus
## 173 UMCUSAU000000194231 UMCUSAU000000194699 0.4858802     angus     angus
## 174 UMCUSAU000000194657 UMCUSAU000000194646 0.4846924    jersey    jersey
## 175 UMCUSAU000000194646 UMCUSAU000000194657 0.4846924    jersey    jersey
## 176 UMCUSAU000000194698 UMCUSAU000000194230 0.4844871     angus     angus
## 177 UMCUSAU000000194230 UMCUSAU000000194698 0.4844871     angus     angus
## 178 UMCUSAM000000196846 HOLUSAM000138122625 0.4803394  holstein  holstein
## 179 HOLUSAM000138122625 UMCUSAM000000196846 0.4803394  holstein  holstein
## 180 HOL840M003131131453 HOL840M003130854065 0.4793351  holstein  holstein
## 181 HOL840M003130854065 HOL840M003131131453 0.4793351  holstein  holstein
## 182 UMCUSAM000000196786 HOLUSAM000071451889 0.4783347  holstein  holstein
## 183 HOLUSAM000071451889 UMCUSAM000000196786 0.4783347  holstein  holstein
## 184 UMCUSAU000000194672 UMCUSAU000000194668 0.4772177    jersey    jersey
## 185 UMCUSAU000000194668 UMCUSAU000000194672 0.4772177    jersey    jersey
## 186 UMCUSAM000000196838 HOLUSAM003126477819 0.4751568  holstein  holstein
## 187 HOLUSAM003126477819 UMCUSAM000000196838 0.4751568  holstein  holstein
## 188 UMCUSAU000000194712 UMCUSAU000000194244 0.4746871     angus     angus
## 189 UMCUSAU000000194244 UMCUSAU000000194712 0.4746871     angus     angus
## 190 UMCUSAM000000196812 HOLUSAM000139383375 0.4707756  holstein  holstein
## 191 HOLUSAM000139383375 UMCUSAM000000196812 0.4707756  holstein  holstein
## 192 HOLUSAM003131131371 HOLUSAM003125201927 0.4705759  holstein  holstein
## 193 HOLUSAM003125201927 HOLUSAM003131131371 0.4705759  holstein  holstein
## 194 UMCUSAU000000194643 UMCUSAU000000194637 0.4705502    jersey    jersey
## 195 UMCUSAU000000194637 UMCUSAU000000194643 0.4705502    jersey    jersey
## 196 HOLCANM000109538236 HOL840M003130854065 0.4705488  holstein  holstein
## 197 HOL840M003130854065 HOLCANM000109538236 0.4705488  holstein  holstein
## 198 SIMUSAM000002144976 RANUSAM000003358146 0.4698128 simmental simmental
## 199 RANUSAM000003358146 SIMUSAM000002144976 0.4698128 simmental simmental
## 200 UMCUSAU000000194341 UMCUSAU000000194340 0.4691180    jersey    jersey
## 201 UMCUSAU000000194340 UMCUSAU000000194341 0.4691180    jersey    jersey

Removal

Charolais

keep <-
  big_grm_long %>%
  filter(ind1_pop == "charolais" & ind2_pop == "charolais") %>%
  distinct(ind1) %>%
  left_join(approved, by = c("ind1" = "international_id")) %>%
  top_n(75, avg_coverage) %>%
  arrange(desc(avg_coverage))
## filter: removed 1018963 rows (99%) 
## distinct: removed 7482 rows (99%) 
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
out <- c("CHAUSAM00000M225504", "CHAUSAM00000M319454", "CHAUSAM00000M234430", "UMCUSAU000000194368", "CHACANM0000RMC30160", "UMCUSAM000000092750", "CHAFRAM005882101816", "CHAUSAM00000M314744", "UMCUSAU000000194741", "UMCUSAU000000194786", "CHAFRAM005872122876", "CHAUSAM00000M434790", "UMCUSAU000000194843", "CHACANM0000MC236083", "CHACANM0000MC293022")

cha <- big_grm_long %>% 
  filter(ind1 %in% keep$ind1 & ind2 %in% keep$ind1) %>% 
  filter(ind1 != ind2) %>% 
  #filter(value > 0.12) %>% 
  filter(!ind1 %in% out) %>% 
  filter(!ind2 %in% out) %>% 
  left_join(approved, by = c("ind1" = "international_id")) %>% 
  select(ind1:value, avg_coverage, everything()) %>% 
  arrange(desc(value), desc(avg_coverage))  %>% 
  distinct(ind1) %>% 
  mutate(pop = "charolais")
## filter: removed 1020907 rows (99%) 
## filter: removed 75 rows (1%) 
## filter: removed 1110 rows (20%) 
## filter: removed 900 rows (20%) 
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …) 
## distinct: removed 3480 rows (98%) 
## mutate: new variable 'pop' with one unique value and 0% NA

Holstein

keep <-
  big_grm_long %>%
  filter(ind1_pop == "holstein" & ind2_pop == "holstein") %>%
  distinct(ind1) %>%
  left_join(approved, by = c("ind1" = "international_id")) %>%
  top_n(75, avg_coverage) %>%
  arrange(desc(avg_coverage))
## filter: removed 965523 rows (94%) 
## distinct: removed 60762 rows (>99%) 
## left_join: added 3 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
out <- c("UMCUSAM000000196809", "UMCUSAU000000194573", "HOLUSAM000071451889", "HOL840M003128043644", "HOL840M003131131453", "HOL840M003129128755", "UMCUSAM000000196846", "HOLUSAM000072512148", "UMCUSAM000000196798", "UMCUSAM000000196831", "UMCUSAM000000196813", "UMCUSAM000000196810", "UMCUSAM000000196795") 

hol <- big_grm_long %>% 
  filter(ind1 %in% keep$ind1 & ind2 %in% keep$ind1) %>% 
  filter(ind1 != ind2) %>% 
  #filter(value > 0.12) %>% 
  filter(!ind1 %in% out) %>% 
  filter(!ind2 %in% out) %>% 
  left_join(approved, by = c("ind1" = "international_id")) %>% 
  select(ind1:value, avg_coverage, everything()) %>% 
  arrange(desc(value), desc(avg_coverage))  %>% 
  distinct(ind1) %>% 
  mutate(pop = "holstein")
## filter: removed 1021203 rows (99%) 
## filter: removed 73 rows (1%) 
## filter: removed 936 rows (18%) 
## filter: removed 780 rows (18%) 
## left_join: added 118 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …) 
## distinct: removed 3598 rows (98%) 
## mutate: new variable 'pop' with one unique value and 0% NA

Angus

keep <-
  big_grm_long %>%
  filter(ind1_pop == "angus" & ind2_pop == "angus") %>%
  distinct(ind1) %>%
  left_join(approved, by = c("ind1" = "international_id")) %>%
  top_n(75, avg_coverage) %>%
  arrange(desc(avg_coverage))
## filter: removed 952003 rows (93%) 
## distinct: removed 74256 rows (>99%) 
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
out <- c("AANUSAM000016447771", "AANUSAM000015330743", "AANUSAM000013009379", "AANUSAM000013544928", "AANUSAM000012452829", "AANUSAM000010239760", "AANUSAM000015899735", "AANUSAM000014844711", "AANUSAM000011160685", "AANUSAM000008505294", "AANUSAM000010848986", "UMCUSAF000000118765", "AANUSAM000014056739", "AANUSAM000011105489", "AANUSAM000007187001") 

an <- big_grm_long %>% 
  filter(ind1 %in% keep$ind1 & ind2 %in% keep$ind1) %>% 
  filter(ind1 != ind2) %>% 
  #filter(value > 0.12) %>% 
  filter(!ind1 %in% out) %>% 
  filter(!ind2 %in% out) %>% 
  left_join(approved, by = c("ind1" = "international_id")) %>% 
  select(ind1:value, avg_coverage, everything()) %>% 
  arrange(desc(value), desc(avg_coverage)) %>% 
  distinct(ind1) %>% 
  mutate(pop = "angus")
## filter: removed 1020907 rows (99%) 
## filter: removed 75 rows (1%) 
## filter: removed 1110 rows (20%) 
## filter: removed 900 rows (20%) 
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …) 
## distinct: removed 3480 rows (98%) 
## mutate: new variable 'pop' with one unique value and 0% NA

Hereford

keep <-
  big_grm_long %>%
  filter(ind1_pop == "hereford" & ind2_pop == "hereford") %>%
  distinct(ind1) %>%
  left_join(approved, by = c("ind1" = "international_id")) %>%
  top_n(75, avg_coverage) %>%
  arrange(desc(avg_coverage))
## filter: removed 1018251 rows (99%) 
## distinct: removed 8190 rows (99%) 
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
out <- c("UMCUSAF000000199724", "UMCUSAF000000199737", "HERCANM000C02698670", "HERUSAM000042361822", "UMCUSAF000000199727", "HERCANM000C02738219", "HERUSAM000041113279", "UMCUSAM000000199725", "UMCUSAF000000199728", "UMCUSAF000000199720", "HERCANM000C02789138", "UMCUSAU000000194350", "HERUSAM000042593689", "UMCUSAF000000199734", "HERCANM000C02020446") 

hfd <- big_grm_long %>% 
  filter(ind1 %in% keep$ind1 & ind2 %in% keep$ind1) %>% 
  filter(ind1 != ind2) %>% 
  #filter(value > 0.12) %>% 
  filter(!ind1 %in% out) %>% 
  filter(!ind2 %in% out) %>% 
  left_join(approved, by = c("ind1" = "international_id")) %>% 
  select(ind1:value, avg_coverage, everything()) %>% 
  arrange(desc(value), desc(avg_coverage)) %>% 
  distinct(ind1) %>% 
  mutate(pop = "hereford")
## filter: removed 1020907 rows (99%) 
## filter: removed 75 rows (1%) 
## filter: removed 1110 rows (20%) 
## filter: removed 900 rows (20%) 
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …) 
## distinct: removed 3480 rows (98%) 
## mutate: new variable 'pop' with one unique value and 0% NA

Simmental

keep <-
  big_grm_long %>%
  filter(ind1_pop == "simmental" & ind2_pop == "simmental") %>%
  distinct(ind1) %>%
  left_join(approved, by = c("ind1" = "international_id")) %>%
  top_n(75, avg_coverage) %>%
  arrange(desc(avg_coverage))
## filter: removed 1009108 rows (98%) 
## distinct: removed 17292 rows (99%) 
## left_join: added one row and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
out <- c("SIMUSAM000001907954", "RANUSAM000003358146", "SIMUSAM000002068996", "SIMUSAM000002240471", "SIMCANM000000287068", "SIMUSAM000002144976", "UMCUSAU000000198090", "SIMCANM000000172281", "SIMUSAM000002081939", "SIMUSAM000002002092", "UMCUSAU000000194365", "SIMUSAM000001282876", "SIMUSAM000001716917", "UMCUSAU000000198091") 

sim <- big_grm_long %>% 
  filter(ind1 %in% keep$ind1 & ind2 %in% keep$ind1) %>% 
  filter(ind1 != ind2) %>% 
  #filter(value > 0.12) %>% 
  filter(!ind1 %in% out) %>% 
  filter(!ind2 %in% out) %>% 
  left_join(approved, by = c("ind1" = "international_id")) %>% 
  select(ind1:value, avg_coverage, everything()) %>% 
  arrange(desc(value), desc(avg_coverage)) %>% 
  distinct(ind1) %>% 
  mutate(pop = "simmental")
## filter: removed 1021056 rows (99%) 
## filter: removed 74 rows (1%) 
## filter: removed 1022 rows (19%) 
## filter: removed 840 rows (19%) 
## left_join: added 59 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …) 
## distinct: removed 3539 rows (98%) 
## mutate: new variable 'pop' with one unique value and 0% NA

Jersey

keep <-
  big_grm_long %>%
  filter(ind1_pop == "jersey" & ind2_pop == "jersey") %>%
  distinct(ind1) %>%
  left_join(approved, by = c("ind1" = "international_id")) %>%
  top_n(75, avg_coverage) %>%
  arrange(desc(avg_coverage))
## filter: removed 1016928 rows (99%) 
## distinct: removed 9506 rows (99%) 
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …)
out <- c("JERUSAM000000652501", "JERUSAM000000646877", "UMCUSAU000000194623", "JERAUSM000A00003096", "JERUSAM000000649797", "JERUSAM000000665185", "JERUSAM000000651268", "UMCUSAU000000194671", "UMCUSAU000000194657", "UMCUSAU000000194672", "UMCUSAU000000194637", "UMCUSAU000000194341", "UMCUSAU000000194335", "UMCUSAU000000194662", "UMCUSAU000000194652") 

jer <- big_grm_long %>% 
  filter(ind1 %in% keep$ind1 & ind2 %in% keep$ind1) %>% 
  filter(ind1 != ind2) %>% 
  #filter(value > 0.12) %>% 
  filter(!ind1 %in% out) %>% 
  filter(!ind2 %in% out) %>% 
  left_join(approved, by = c("ind1" = "international_id")) %>% 
  select(ind1:value, avg_coverage, everything()) %>% 
  arrange(desc(value), desc(avg_coverage)) %>% 
  distinct(ind1) %>% 
  mutate(pop = "jersey")
## filter: removed 1020907 rows (99%) 
## filter: removed 75 rows (1%) 
## filter: removed 1110 rows (20%) 
## filter: removed 900 rows (20%) 
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …) 
## distinct: removed 3480 rows (98%) 
## mutate: new variable 'pop' with one unique value and 0% NA

Re-evaluate post-pruning

grm_prune <-
  bind_rows(an, cha, hol, hfd, jer, sim)

big_grm_prune <- 
  big_grm_long %>% 
  filter(ind1 %in% grm_prune$ind1) %>% 
  filter(ind2 %in% grm_prune$ind1) 
## filter: removed 652492 rows (64%) 
## filter: removed 244440 rows (65%)
big_grm_prune %>% 
    ggplot(aes(x = ind1,
               y = ind2,
               fill = value)) +
    geom_tile() +
    viridis::scale_fill_viridis(
      option = "inferno", 
      direction = -1) +
    theme(
      axis.text.x = element_blank(),
      axis.text.y = element_blank(),
      axis.ticks = element_blank()
    )  +
  labs(
    x = "Individual 1",
    y = "Individual 2",
    title = str_c("Pairwise genomic relatedness post-pruning: multi-breed (all individuals)")
  )

ggsave(here::here(str_c("figures/sample_selection/big_grm_pruned.png")))
## Saving 7 x 5 in image
map(list("holstein", "angus", "simmental", "jersey", "hereford", "charolais"), plot_big_grm_prune)
## filter: removed 108000 rows (83%) 
## filter: removed 18000 rows (83%)
## Saving 7 x 5 in image
## filter: removed 108000 rows (83%) 
## filter: removed 18000 rows (83%)
## Saving 7 x 5 in image
## filter: removed 108000 rows (83%) 
## filter: removed 18000 rows (83%)
## Saving 7 x 5 in image
## filter: removed 108000 rows (83%) 
## filter: removed 18000 rows (83%)
## Saving 7 x 5 in image
## filter: removed 108000 rows (83%) 
## filter: removed 18000 rows (83%)
## Saving 7 x 5 in image
## filter: removed 108000 rows (83%) 
## filter: removed 18000 rows (83%)
## Saving 7 x 5 in image
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL

Plot distribution of post-pruning GRM values

big_grm_prune %>% 
  filter(ind1_pop == ind2_pop) %>% 
  filter(ind1 != ind2) %>% 
  mutate(ind1_pop = str_to_title(ind1_pop)) %>% 
  ggplot(aes(x = value, 
             y = forcats::fct_reorder(ind1_pop, value, mean), 
             #fill = forcats::fct_reorder(ind1_pop, value, mean, .desc = TRUE))) +
             fill = ind1_pop)) +
  ggridges::geom_density_ridges(alpha = 0.5) +
  guides(fill = FALSE) +
  geom_vline(aes(xintercept = mean(value))) +
  labs(x = "GRM value", y = "Kernel density", title = str_wrap("Density of pairwise genomic relatedness by breed post-pruning", width = 50))
## filter: removed 108000 rows (83%) 
## filter: removed 360 rows (2%) 
## mutate: changed 21240 values (100%) of 'ind1_pop' (0 new NA)
## Picking joint bandwidth of 0.00924

cowplot::ggsave(here::here("figures/sample_selection/prune_grm_breed_density.png"), width = 10, height = 7, dpi = 500)
## Picking joint bandwidth of 0.00924

  • Actually increased mean relatedness? How did that happen?
big_grm_long %>% 
  filter(ind1_pop == ind2_pop) %>% 
  filter(!ind1_pop %in% c("brahman", "limousin")) %>% 
  filter(!ind2_pop %in% c("brahman", "limousin")) %>% 
  bind_rows(big_grm_prune %>% 
              filter(ind1_pop == ind2_pop) %>% 
              mutate(prune = "Post")) %>%
  filter(ind1 != ind2) %>% 
  group_by(prune) %>%
  summarise(mean_rel = mean(value),
            min_rel = min(value),
            max_rel = max(value))
## filter: removed 844516 rows (82%) 
## filter: removed 3600 rows (2%) 
## filter: no rows removed 
## filter: removed 108000 rows (83%) 
## mutate: new variable 'prune' with one unique value and 0% NA 
## filter: removed 1288 rows (1%) 
## group_by: 0 groups []
## # A tibble: 2 x 4
##   prune mean_rel min_rel max_rel
##   <chr>    <dbl>   <dbl>   <dbl>
## 1 <NA>    0.0745  -0.100   1.27 
## 2 Post    0.0896  -0.100   0.400
big_grm_long %>% 
  filter(ind1_pop == ind2_pop) %>% 
  filter(!ind1_pop %in% c("brahman", "limousin")) %>% 
  filter(!ind2_pop %in% c("brahman", "limousin")) %>% 
  mutate(prune = "Pre") %>% 
  bind_rows(big_grm_prune %>% 
              filter(ind1_pop == ind2_pop) %>% 
              mutate(prune = "Post")) %>%
  filter(ind1 != ind2) %>% 
  mutate(ind1_pop = str_to_title(ind1_pop)) %>% 
  ggplot(aes(x = value, fill = prune)) +
  geom_density(alpha = 0.5) +
  labs(x = "GRM value", y = "Kernel density", fill = "", title = "Density of pairwise genomic relatedness post-pruning")
## filter: removed 844516 rows (82%) 
## filter: removed 3600 rows (2%) 
## filter: no rows removed 
## mutate: new variable 'prune' with one unique value and 0% NA 
## filter: removed 108000 rows (83%) 
## mutate: new variable 'prune' with one unique value and 0% NA 
## filter: removed 1288 rows (1%) 
## mutate: changed 198728 values (100%) of 'ind1_pop' (0 new NA)

cowplot::ggsave(here::here("figures/sample_selection/prune_grm_all_density.png"), width = 10, height = 7, dpi = 500)
big_grm_long %>%
  mutate(prune = "Pre") %>% 
  bind_rows(big_grm_prune %>% 
              mutate(prune = "Post")) %>%
  filter(ind1 != ind2 & ind1_pop == ind2_pop) %>% 
  filter(!ind1_pop %in% c("brahman", "limousin")) %>% 
  filter(!ind2_pop %in% c("brahman", "limousin")) %>% 
  group_by(prune, ind1) %>%
  summarise(
    n_hi = sum(value > 0.12),
    mean_rel = mean(value),
    max_rel = max(value),
    min_rel = min(value)
  ) 


%>% 
  left_join(breed_key, by = c("ind1" = "id")) %>% 
  ggplot(aes(x = n_hi,
             fill = prune)) +
  geom_histogram(bins = 8, alpha = 0.5) +
  facet_wrap(~ pop)

  
  
  
 big_grm_prune %>%
  filter(!ind1_pop %in% c("brahman", "limousin")) %>% 
  filter(!ind2_pop %in% c("brahman", "limousin")) %>% 
  filter(ind1 != ind2) %>% 
  group_by(ind1) %>%
  summarise(
    n_hi = sum(value > 0.12),
    mean_rel = mean(value),
    max_rel = max(value),
    min_rel = min(value)
  ) %>% 
  arrange(desc(n_hi)) %>% 
  left_join(breed_key, by = c("ind1" = "id")) %>% 
  left_join(cov_avg, by = c("ind1" = "sid")) %>% 
  mutate(pop = str_to_title(pop)) %>% 
  ggplot(aes(x = n_hi,
             #fill = forcats::fct_reorder(pop, n_hi, mean, .desc = TRUE))) +
             fill = pop)) +
  guides(fill = FALSE) +
  geom_histogram(bins = 8) +
  facet_wrap(~ forcats::fct_reorder(pop, n_hi, mean, .desc = TRUE)) +
  labs(title = "Number of 'influential individuals' by breed", y = "Number of individuals", x = str_wrap("Number of pairwise comparisons where value > 0.12, by individual", width = 50))
 
cowplot::ggsave(here::here("figures/sample_selection/big_grm_n_influential.png"), width = 10, height = 7, dpi = 500)

Defunct: By breed-specific GRM

grm_long <-
  melt_grm("angus") %>% 
  bind_rows(melt_grm("limousin")) %>% 
  bind_rows(melt_grm("holstein")) %>% 
  bind_rows(melt_grm("hereford")) %>% 
  bind_rows(melt_grm("simmental")) %>%
  bind_rows(melt_grm("jersey")) %>%
  bind_rows(melt_grm("charolais")) %>%
  bind_rows(melt_grm("brahman"))
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X274 = col_logical()
## )
## See spec(...) for full column specifications.
## select_if: dropped one variable (X274)
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_double(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double()
## )
## select: dropped 5 variables (X2, X3, X4, X5, X6) 
## mutate: new variable 'pop' with one unique value and 0% NA
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X52 = col_logical()
## )
## See spec(...) for full column specifications.
## select_if: dropped one variable (X52)
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_double(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double()
## )
## select: dropped 5 variables (X2, X3, X4, X5, X6) 
## mutate: new variable 'pop' with one unique value and 0% NA
## Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X248 = col_logical()
## )
## See spec(...) for full column specifications.
## select_if: dropped one variable (X248)
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_double(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double()
## )
## select: dropped 5 variables (X2, X3, X4, X5, X6) 
## mutate: new variable 'pop' with one unique value and 0% NA
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X92 = col_logical()
## )
## See spec(...) for full column specifications.
## select_if: dropped one variable (X92)
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_double(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double()
## )
## select: dropped 5 variables (X2, X3, X4, X5, X6) 
## mutate: new variable 'pop' with one unique value and 0% NA
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X133 = col_logical()
## )
## See spec(...) for full column specifications.
## select_if: dropped one variable (X133)
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_double(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double()
## )
## select: dropped 5 variables (X2, X3, X4, X5, X6) 
## mutate: new variable 'pop' with one unique value and 0% NA
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X99 = col_logical()
## )
## See spec(...) for full column specifications.
## select_if: dropped one variable (X99)
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_double(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double()
## )
## select: dropped 5 variables (X2, X3, X4, X5, X6) 
## mutate: new variable 'pop' with one unique value and 0% NA
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X88 = col_logical()
## )
## See spec(...) for full column specifications.
## select_if: dropped one variable (X88)
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_double(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double()
## )
## select: dropped 5 variables (X2, X3, X4, X5, X6) 
## mutate: new variable 'pop' with one unique value and 0% NA
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   X61 = col_logical()
## )
## See spec(...) for full column specifications.
## select_if: dropped one variable (X61)
## Parsed with column specification:
## cols(
##   X1 = col_character(),
##   X2 = col_character(),
##   X3 = col_double(),
##   X4 = col_double(),
##   X5 = col_double(),
##   X6 = col_double()
## )
## select: dropped 5 variables (X2, X3, X4, X5, X6) 
## mutate: new variable 'pop' with one unique value and 0% NA
## Warning in bind_rows_(x, .id): binding character and factor vector,
## coercing into character vector

Plot breed specific GRM

map(list("holstein", "angus", "simmental", "jersey", "hereford", "charolais", "brahman", "limousin"), plot_grm)
## filter: removed 123608 rows (67%)
## Saving 7 x 5 in image
## filter: removed 110088 rows (60%)
## Saving 7 x 5 in image
## filter: removed 167193 rows (91%)
## Saving 7 x 5 in image
## filter: removed 175013 rows (95%)
## Saving 7 x 5 in image
## filter: removed 176336 rows (96%)
## Saving 7 x 5 in image
## filter: removed 177048 rows (96%)
## Saving 7 x 5 in image
## filter: removed 181017 rows (98%)
## Saving 7 x 5 in image
## filter: removed 182016 rows (99%)
## Saving 7 x 5 in image
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
  • Likely duplicate Charolais samples
  • Should probably treat lowline Angus as a separate population
grm_long %>% 
  left_join(approved, by = c("ind1" = "international_id")) %>% 
  filter(ind1 != ind2) %>% 
  top_n(200, value) %>% 
  arrange(desc(value)) %>% 
  writexl::write_xlsx(here::here("data/derived_data/sample_selection/bs_top200_nodiag.xlsx"))
## left_join: added 873 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …) 
## filter: removed 1043 rows (1%)

Defunct: Pairwise relatedness via kinship coefficient

kin <- 
  list.files(here::here("data/derived_data/sample_selection/ds_relatedness2/"),
                  full.names = TRUE) %>% 
  #Name the elements of the list based on a stripped down version of the filepath
  purrr::set_names(nm = (basename(.) %>% 
                    tools::file_path_sans_ext())) %>% 
  purrr::map_df(read_table2, .id = "source") %>% 
  mutate(source = str_remove(source, "\\.ds_relatedness2")) %>% 
  rename(ind1 = INDV1, ind2 = INDV2, phi = RELATEDNESS_PHI, source_pop = source)
## mutate: changed 184617 values (100%) of 'source' (0 new NA)

Plot kinship coefficients

map(list("holstein", "angus", "simmental", "jersey", "hereford", "charolais", "brahman", "limousin"), plot_kin)
## filter: removed 123608 rows (67%)
## filter: removed 110088 rows (60%)
## filter: removed 167193 rows (91%)
## filter: removed 175013 rows (95%)
## filter: removed 176336 rows (96%)
## filter: removed 177048 rows (96%)
## filter: removed 181017 rows (98%)
## filter: removed 182016 rows (99%)
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL

Plotting highly “connected” individuals vs. coverage

Confirmed that I’m not going to screw anything coverage-wise by first removing highly connected individuals

map(list("holstein", "angus", "simmental", "jersey", "hereford", "charolais", "brahman", "limousin"), plot_kin_cov)
## filter: removed 123855 rows (67%) 
## distinct: removed 30381 rows (50%) 
## group_by: 0 groups [] 
## left_join: added 3 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …) 
## select: dropped 10 variables (Genus, Species, breed.breed, Common_name, biosample, …)
## filter: removed 110361 rows (60%) 
## distinct: removed 37128 rows (50%) 
## group_by: 0 groups [] 
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …) 
## select: dropped 10 variables (Genus, Species, breed.breed, Common_name, biosample, …)
## filter: removed 167325 rows (91%) 
## distinct: removed 8646 rows (50%) 
## group_by: 0 groups [] 
## left_join: added one row and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …) 
## select: dropped 10 variables (Genus, Species, breed.breed, Common_name, biosample, …)
## filter: removed 175111 rows (95%) 
## distinct: removed 4753 rows (50%) 
## group_by: 0 groups [] 
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …) 
## select: dropped 10 variables (Genus, Species, breed.breed, Common_name, biosample, …)
## Warning: Removed 1 rows containing missing values (geom_point).
## filter: removed 176427 rows (96%) 
## distinct: removed 4095 rows (50%) 
## group_by: 0 groups [] 
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …) 
## select: dropped 10 variables (Genus, Species, breed.breed, Common_name, biosample, …)
## filter: removed 177135 rows (96%) 
## distinct: removed 3741 rows (50%) 
## group_by: 0 groups [] 
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …) 
## select: dropped 10 variables (Genus, Species, breed.breed, Common_name, biosample, …)
## filter: removed 181077 rows (98%) 
## distinct: removed 1770 rows (50%) 
## group_by: 0 groups [] 
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …) 
## select: dropped 10 variables (Genus, Species, breed.breed, Common_name, biosample, …)
## filter: removed 182067 rows (99%) 
## distinct: removed 1275 rows (50%) 
## group_by: 0 groups [] 
## left_join: added 0 rows and added 13 columns (Genus, Species, breed.breed, Common_name, biosample, …) 
## select: dropped 10 variables (Genus, Species, breed.breed, Common_name, biosample, …)
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL

Plot distribution of kinship values

kin %>% 
  mutate(source_pop = str_to_title(source_pop)) %>% 
  ggplot(aes(x = phi, y = forcats::fct_reorder(source_pop, phi, mean), fill = forcats::fct_reorder(source_pop, phi, mean, .desc = TRUE))) +
  ggridges::geom_density_ridges(alpha = 0.5) +
  geom_vline(aes(xintercept = mean(phi))) +
  labs(x = "Kinship coefficient", y = "Kernel density", fill = "Breed", title = "Density of pairwise kinship coefficient by breed")
## mutate: changed 184617 values (100%) of 'source_pop' (0 new NA)
## Picking joint bandwidth of 0.0174

cowplot::ggsave(here::here("figures/sample_selection/kinship_density.png"), width = 7, height = 10, dpi = 500)
## Picking joint bandwidth of 0.0174

Defuct: By recorded pedigree relatedness

Outgroup samples

  • Maybe just remove everything with less than 4X coverage for now?

Modern breeds

Other